A comparison of JULES-ES-1p0 wave01 members against the original ensemble (wave00).
Wave 01 input parameter sets were picked using History matching to fall within Andy Wiltshire’s basic constraints on NBP, NPP, cSoil and cVeg stocks at the end of the 20th century. We use 300 of the 500 members, keeping back 2/5ths for emulator validation later.
We answer some basic questions.
What proportion of the new ensemble match AW’s constraints?
What do the timeseries of carbon cycle properties look like with and without AW’s constraints?
How good is a GP emulator? Does it get better overall with the new ensemble members added? In particular, does it get better for those members within the AW constraints?
Does comparison of the ensemble with Atmospheric growth observations give more of a constraint?
To do:
Does the sensitivity analysis change?
Load libraries, functions and data.
ensloc <- '/scratch/hadaw/jules_postprocess/u-ck006/'
makeJulesEnsembleModernValue <- function(ensloc, varlist, nstart, nend, ix = 144:164){
nens <- (nend - nstart) + 1
datmat <- matrix(nrow = nens, ncol = length(varlist))
colnames(datmat) <- varlist
enslist <- paste("P", formatC(nstart:nend, width=4, flag="0"), sep="")
for(i in 1:nens){
vec <- rep(NA, length(varlist))
ensmember <- enslist[i]
fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
try(nc <- nc_open(paste0(fn)))
try(vec <- sapply(varlist, FUN = modernValue, nc = nc, ix = ix))
datmat[i, ] <- vec
nc_close(nc)
}
return(list(datmat = datmat, enslist = enslist))
}
nstart <- 499
nend <- 798
#if (file.exists("ensemble_wave01.rdata")) {
# load("ensemble_wave01.rdata")
#} else {
ens_wave01_mv <- makeJulesEnsembleModernValue(ensloc = '/scratch/hadaw/jules_postprocess/u-ck006/',
varlist = y_names_sum,
nstart = nstart,
nend = nend,
ix = 144:164)
# save(ens_wave01_mv, file ="ensemble_wave01.rdata")
#}
lhs_wave01 <- read.table( '../conf_files_augment_JULES-ES-1p0/lhs_example.txt', header = TRUE)
X_wave01 = normalize(lhs_wave01, wrt = rbind(lhs_i, lhs_ii, lhs_wave01))
colnames(X_wave01) = colnames(lhs_wave01)
# Match the 300 outputs we're using in the training data
X_wave01_train <- X_wave01[1:300,]
Y_const_wave01 <- ens_wave01_mv$datmat[, ynames_const]
Y_const_wave01_scaled <- sweep(Y_const_wave01, 2, STATS = scalevec, FUN = '/' )
There are no NAs but some relative humidity values are infinite. There are no “low NPP” ensemble members
low_npp_ix_wave01 <- which(ens_wave01_mv$datmat[,'npp_nlim_lnd_sum'] < 1e5)
min(ens_wave01_mv$datmat[,'npp_nlim_lnd_sum'])
[1] 117464.6
Y_wave01_nlevel0_ix <- which(is.na(ens_wave01_mv$datmat[,'nbp_lnd_sum']))
all(is.finite(ens_wave01_mv$datmat))
[1] FALSE
which(!is.finite(ens_wave01_mv$datmat), arr.ind = TRUE)
row col
[1,] 140 9
[2,] 232 9
[3,] 249 9
[4,] 300 9
ens_wave01_mv$datmat[which(!is.finite(ens_wave01_mv$datmat), arr.ind = TRUE)]
[1] Inf Inf Inf Inf
colnames(ens_wave01_mv$datmat)[9]
[1] "rh_lnd_sum"
Global mean for the 20 years at the end of the 20th Century. There is still a significant low bias on cVeg output.
wave00col <- 'skyblue2'
wave01col <- 'tomato2'
wave00col <- 'dodgerblue2'
wave01col <- 'firebrick'
rangecol <- 'grey'
# Histogram of level 1 constraints
hcol = 'darkgrey'
lcol = 'black'
#pdf(file = 'figs/level_2_constraints_hists.pdf', width = 6, height = 5)
par(mfrow = c(2,2), fg = 'darkgrey', las = 1, oma = c(0.1, 0.1, 4, 0.1))
trunc <- function(x, vec){
dat <- x[x < max(vec) & x > min(vec) ]
dat
}
h <- hist(Y_const_level1a_scaled[,'nbp_lnd_sum'], main = 'NBP', xlab = 'GtC/year', col = makeTransparent(wave00col,150))
hist(trunc(Y_const_wave01_scaled [,'nbp_lnd_sum'], h$breaks) ,
col = makeTransparent(wave01col,150) , breaks = h$breaks, add = TRUE)
rug(Y_const_stan_scaled['nbp_lnd_sum'], lwd = 2)
polygon(x = c(0, 100, 100, 0), y = c(0, 0, 1000, 1000),
col = makeTransparent(rangecol, 60),
border = makeTransparent(rangecol))
h <- hist(Y_const_level1a_scaled[,'npp_nlim_lnd_sum'],col = makeTransparent(wave00col,150), main = 'NPP', xlab = 'GtC/year')
hist(trunc(Y_const_wave01_scaled [,'npp_nlim_lnd_sum'], h$breaks) ,
col = makeTransparent(wave01col) , breaks = h$breaks, add = TRUE)
rug(Y_const_stan_scaled['npp_nlim_lnd_sum'], lwd = 2)
polygon(x = c(35, 80, 80, 35), y = c(0, 0, 1000, 1000),
col = makeTransparent(rangecol, 60),
border = makeTransparent(rangecol))
h <- hist(Y_const_level1a_scaled[,'cSoil_lnd_sum'], col = makeTransparent(wave00col,150), main = 'Soil Carbon', xlab = 'GtC')
hist(trunc(Y_const_wave01_scaled [,'cSoil_lnd_sum'], h$breaks) ,
col = makeTransparent(wave01col,150) , breaks = h$breaks, add = TRUE)
rug(Y_const_stan_scaled['cSoil_lnd_sum'], lwd = 2)
polygon(x = c(750, 3000, 3000, 750), y = c(0, 0, 1000, 1000),
col = makeTransparent(rangecol, 60),
border = makeTransparent(rangecol))
h <- hist(Y_const_level1a_scaled[,'cVeg_lnd_sum'], col = makeTransparent(wave00col,150), main = 'Vegetation Carbon', xlab = 'GtC')
hist(trunc(Y_const_wave01_scaled [,'cVeg_lnd_sum'], h$breaks) ,
col = makeTransparent(wave01col,150) , breaks = h$breaks, add = TRUE)
rug(Y_const_stan_scaled['cVeg_lnd_sum'], lwd = 2)
polygon(x = c(300, 800, 800, 300), y = c(0, 0, 1000, 1000),
col = makeTransparent(rangecol, 60),
border = makeTransparent(rangecol))
#dev.off()
reset()
legend('top', horiz = TRUE, fill = c(makeTransparent(wave00col, 150), makeTransparent(wave01col, 150), makeTransparent(rangecol, 60)), legend = c('Wave00', 'Wave01', 'AW range'))
A third! Better than before, but still not great. Pointing at a significant model discrepency in cVeg
AW_constraints <- matrix(nrow = 2, ncol = length(ynames_const))
AW_constraints[1,] <- c(0, 35, 750, 300)
AW_constraints[2,] <- c(100, 80, 3000, 800)
colnames(AW_constraints) <- ynames_const
rownames(AW_constraints) <- c('min', 'max')
# conform to Andy's basic constraints
#level2_ix_wave01 <- which(apply(Y_const_wave01_scaled, 1, FUN = withinRange, maxes = AW_constraints[2,], mins = AW_constraints[1,] ))
#nlevel2_ix_wave01 <- which(apply(Y_const_wave01_scaled, 1, FUN = withinRange, maxes = AW_constraints[2,], mins = AW_constraints[1,] ) == FALSE)
level2_ix_wave01 <- which(Y_const_wave01_scaled[,'nbp_lnd_sum'] > 0 &
Y_const_wave01_scaled[,'npp_nlim_lnd_sum'] > 35 & Y_const_wave01_scaled[,'npp_nlim_lnd_sum'] < 80 &
Y_const_wave01_scaled[,'cSoil_lnd_sum'] > 750 & Y_const_wave01_scaled[,'cSoil_lnd_sum'] < 3000 &
Y_const_wave01_scaled[,'cVeg_lnd_sum'] > 300 & Y_const_wave01_scaled[,'cVeg_lnd_sum'] < 800
)
Of the 300 members of the wave01 ensemble, 100 pass Andy Wiltshire’s Level 2 constraints.
length(level2_ix_wave01)
[1] 100
Pairs plot of the inputs that pass the constraints with respect to the limits of the original ensemble.
pairs(X_wave01[level2_ix_wave01, ],
xlim = c(0,1),
ylim = c(0,1),
gap = 0,
lower.panel = NULL,
pch = 20,
col = makeTransparent(wave01col,200)
)
## ----------------------------------------------------------------------
## Load ensemble
## ----------------------------------------------------------------------
if (file.exists("ensemble_timeseries_wave01.rdata")) {
load("ensemble_timeseries_wave01.rdata")
} else {
# primary carbon cycle outputs
npp_ens <- makeTimeseriesEnsemble(variable = "npp_nlim_lnd_sum") / (1e12/ysec)
nbp_ens <- makeTimeseriesEnsemble(variable = "nbp_lnd_sum") / (1e12/ysec)
cSoil_ens <- makeTimeseriesEnsemble(variable = "cSoil_lnd_sum") / 1e12
cVeg_ens <- makeTimeseriesEnsemble(variable = "cVeg_lnd_sum") / 1e12
lai_lnd_mean_ens <- makeTimeseriesEnsemble(variable = "lai_lnd_mean")
# fluxes
rh_lnd_sum_ens <- makeTimeseriesEnsemble(variable = "rh_lnd_sum") / (1e12/ysec)
fLuc_lnd_sum_ens <- makeTimeseriesEnsemble(variable = "fLuc_lnd_sum") / (1e12/ysec)
fHarvest_lnd_sum_ens <- makeTimeseriesEnsemble(variable = "fHarvest_lnd_sum") / (1e12/ysec)
# fractions
treeFrac_lnd_mean_ens <- makeTimeseriesEnsemble(variable = "treeFrac_lnd_mean")
shrubFrac_lnd_mean_ens <- makeTimeseriesEnsemble(variable = "shrubFrac_lnd_mean")
baresoilFrac_lnd_mean_ens <- makeTimeseriesEnsemble(variable = "baresoilFrac_lnd_mean")
#c3PftFrac_lnd_mean_ens <- makeTimeseriesEnsemble(variable = "c3PftFrac_lnd_mean_ens")
#c4PftFrac_lnd_mean_ens <- makeTimeseriesEnsemble(variable = "c4PftFrac_lnd_mean_ens")
save(npp_ens, nbp_ens, cSoil_ens, cVeg_ens, lai_lnd_mean_ens, rh_lnd_sum_ens, fLuc_lnd_sum_ens, fHarvest_lnd_sum_ens,
treeFrac_lnd_mean_ens, shrubFrac_lnd_mean_ens, baresoilFrac_lnd_mean_ens, file = "ensemble_timeseries_wave01.rdata" )
}
total_land_carbon_ens <- cSoil_ens + cVeg_ens
makeTimeseriesEnsemble <- function(ensloc, variable, nstart, nend, nts = 164, cn = 1850:2013){
nens <- (nend - nstart) + 1
# nens is number of ensemble members
# nts length of timeseries
# cn is colnames()
datmat <- matrix(NA, nrow = nens, ncol = nts)
colnames(datmat) <- cn
enslist <- paste("P", formatC(nstart:nend, width=4, flag="0"), sep="")
#floc <- paste0(ensloc,ensmember,subdir)
for(i in 1:nens){
vec <- rep(NA,nts)
ensmember <- enslist[i]
fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
try(nc <- nc_open(paste0(fn)))
try(dat <- extractTimeseries(nc, variable))
datmat[i, ] <- dat
nc_close(nc)
}
datmat
}
nstart <- 499
nend <- 798
if (file.exists("ensemble_timeseries_wave01.rdata")) {
load("ensemble_timeseries_wave01.rdata")
} else {
# primary carbon cycle outputs
npp_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "npp_nlim_lnd_sum") / (1e12/ysec)
nbp_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend,variable = "nbp_lnd_sum") / (1e12/ysec)
cSoil_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend,variable = "cSoil_lnd_sum") / 1e12
cVeg_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend,variable = "cVeg_lnd_sum") / 1e12
#
#
lai_lnd_mean_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend,variable = "lai_lnd_mean")
#
# # fluxes
rh_lnd_sum_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "rh_lnd_sum") / (1e12/ysec)
fLuc_lnd_sum_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "fLuc_lnd_sum") / (1e12/ysec)
fHarvest_lnd_sum_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "fHarvest_lnd_sum") / (1e12/ysec)
#
#
# # fractions
treeFrac_lnd_mean_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "treeFrac_lnd_mean")
shrubFrac_lnd_mean_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "shrubFrac_lnd_mean")
baresoilFrac_lnd_mean_ens_wave01 <- makeTimeseriesEnsemble(ensloc = ensloc,nstart = nstart, nend = nend, variable = "baresoilFrac_lnd_mean")
save(npp_ens_wave01,
nbp_ens_wave01,
cSoil_ens_wave01,
cVeg_ens_wave01,
lai_lnd_mean_ens_wave01,
rh_lnd_sum_ens_wave01,
fLuc_lnd_sum_ens_wave01,
fHarvest_lnd_sum_ens_wave01,
treeFrac_lnd_mean_ens_wave01,
shrubFrac_lnd_mean_ens_wave01,
baresoilFrac_lnd_mean_ens_wave01,
file = "ensemble_timeseries_wave01.rdata" )
}
#total_land_carbon_ens_wave01 <- cSoil_ens_wave01 + cVeg_ens_wave01
lcol_wave0 <- makeTransparent('dodgerblue2', 120)
lcol_wave01 <- makeTransparent('firebrick', 120)
lcol_wave01_level2 <- 'gold'
stancol = 'black'
linePlotMultiEns <- function(years, ens1, ens2, ens3, col1, col2, col3, ylab, main, ylim = NULL){
# Plot wave00 and wave01 timeseries on top of one another
nt <- length(years)
if(is.null(ylim)){
ylim = range(c(ens1[,1], ens1[,nt], ens2[,1], ens2[ ,nt], ens3[,1], ens3[, nt]))
}
else ylim <- ylim
matplot(years, t(ens1), type = 'l', lty = 'solid',ylim = ylim, col = col1,
ylab = ylab, main = main, xlab = '',
bty = 'n')
matlines(years, t(ens2), col = col2, lty = 'solid')
matlines(years, t(ens3), col = col3, lty = 'solid')
}
par(mfrow= c(3,4), las = 1)
linePlotMultiEns(years = years, ens1 = npp_ens, ens2 = npp_ens_wave01, ens3 = npp_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'NPP')
lines(years,npp_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = nbp_ens, ens2 = nbp_ens_wave01, ens3 = nbp_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01,col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'NBP', ylim = c(-10,10))
lines(years, nbp_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = cSoil_ens, ens2 = cSoil_ens_wave01, ens3 = cSoil_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'cSoil', ylim = range(c(cSoil_ens[,1], cSoil_ens[,164])))
lines(years, cSoil_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = cVeg_ens, ens2 = cVeg_ens_wave01, ens3 = cVeg_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'cVeg')
lines(years, cVeg_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = lai_lnd_mean_ens, ens2 = lai_lnd_mean_ens_wave01, ens3 = lai_lnd_mean_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'Lai')
lines(years, lai_lnd_mean_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = rh_lnd_sum_ens, ens2 = rh_lnd_sum_ens_wave01, ens3 = rh_lnd_sum_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'RH')
lines(years, rh_lnd_sum_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = fLuc_lnd_sum_ens, ens2 = fLuc_lnd_sum_ens_wave01, ens3 = fLuc_lnd_sum_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'fLuc')
lines(years, fLuc_lnd_sum_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = fHarvest_lnd_sum_ens, ens2 = fHarvest_lnd_sum_ens_wave01,
ens3 = fHarvest_lnd_sum_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'fHarvest')
lines(years, fHarvest_lnd_sum_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = treeFrac_lnd_mean_ens, ens2 = treeFrac_lnd_mean_ens_wave01,
ens3 = treeFrac_lnd_mean_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'treefrac'
)
lines(years, treeFrac_lnd_mean_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = shrubFrac_lnd_mean_ens, ens2 = shrubFrac_lnd_mean_ens_wave01,
ens3 = shrubFrac_lnd_mean_ens[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'shrubfrac'
)
lines(years, shrubFrac_lnd_mean_stan, col = stancol, lty = 'solid', lwd = 2)
linePlotMultiEns(years = years, ens1 = baresoilFrac_lnd_mean_ens, ens2 = baresoilFrac_lnd_mean_ens_wave01,
ens3 = baresoilFrac_lnd_mean_ens_wave01[level2_ix_wave01, ],
col1 = lcol_wave0, col2 = lcol_wave01, col3 = lcol_wave01_level2,
ylab = 'GtC', main = 'baresoilfrac')
lines(years, baresoilFrac_lnd_mean_stan, col = stancol, lty = 'solid', lwd = 2)
reset()
legend('bottomright', legend = c('wave00','wave01','wave01 level2','standard'), lty = 'solid', lwd = 1.5, col = c(lcol_wave0, lcol_wave01, lcol_wave01_level2, stancol), inset = c(0.05, 0.15) )
NA
NA
This is a plot of timeseries of Wave00, Wave01, and level2-constrained wave01 on top of one another. We see that the wave01 is closer to the standard than wave00, and the level-2 constrained wave01 ensemble is often closer again. However, there are still quite large discrepancies. For example, baresoilfrac is often way too high, shrubfrac is often too low (though both these span the standard). Treefrac is away from zero, but still often too low or too high. While fHarvest looks good, fLuc does not appear constrained by the process at all. RH (soil respiration) looks well constrained, whereas lai is often too low.
One thing we could do next is constrain input space again, using observations or “tolerance to error” on some or all of these outputs.
We could also extend sensitivity analysis to work out what controls e.g. treefrac.
We hope that running the new ensemble gives us a better emulator, and allows us to rule out more input space. We particularly hope that the emulator is better for those members that are inside AW’s constraints.
First, we can look at the emulator errors in two cases: The level1a data (a basic carbon cycle), and then with the Wave01 data, which should have similar characteristics. (We should have eliminated really bad simulations, but wave01 is not constrained the data perfectly to be within AW constraints.)
#fit_list_const_level1a <- createKmFitList(X = X_level1a, Y = Y_const_level1a_scaled)
Y_const_level1a_scaled_list <- mat2list(Y_const_level1a_scaled)
fit_list_const_level1a <- mclapply(X = Y_const_level1a_scaled_list, FUN = km, formula = ~., design = X_level1a,
mc.cores = 4, control = list(trace = FALSE))
# Bind together the input matrices and scaled output data
X_level1a_wave01 <- rbind(X_level1a, X_wave01_train)[-440, ]
Y_const_level1a_wave01_scaled <- rbind(Y_const_level1a_scaled, Y_const_wave01_scaled)[-440, ]
apply(Y_const_level1a_wave01_scaled ,2, which.max)
nbp_lnd_sum npp_nlim_lnd_sum cSoil_lnd_sum cVeg_lnd_sum
583 213 317 312
apply(Y_const_level1a_wave01_scaled ,2, which.min)
nbp_lnd_sum npp_nlim_lnd_sum cSoil_lnd_sum cVeg_lnd_sum
314 243 243 243
Found the outlier - looks like it’s 440
findOutliers <- function(x, sds = 6){
ix <- which(abs(x - mean(x)) > (sds * sd(x)))
}
apply(Y_const_level1a_wave01_scaled ,2, findOutliers, sds = 10)
integer(0)
# Create fit lists for the combined data
Y_const_level1a_wave01_scaled_list <- mat2list(Y_const_level1a_wave01_scaled)
fit_list_const_level1a_wave01 <- mclapply(X = Y_const_level1a_wave01_scaled_list , FUN = km, formula = ~., design = X_level1a_wave01,
mc.cores = 4, control = list(trace = FALSE))
loolist_km_Y_level1a <- mclapply(X = fit_list_const_level1a, FUN = leaveOneOut.km, type = 'UK', trend.reestim = TRUE)
loolist_km_Y_level1a_wave01 <- mclapply(X = fit_list_const_level1a_wave01, FUN = leaveOneOut.km, type = 'UK', trend.reestim = TRUE)
loostats_km_Y_level1a <- lapply(fit_list_const_level1a, FUN = kmLooStats)
loostats_km_Y_level1a_wave01 <- lapply(fit_list_const_level1a_wave01, FUN = kmLooStats)
The top row shows the leave-one-out prediction accuracy of the original wave00 ensemble, and the lower row the entire wave00 AND wave01 ensemble combined.
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
par(mfrow = c(2,4), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_Y_level1a)){
y <- Y_const_level1a_scaled[, i]
loo <- loolist_km_Y_level1a[[i]]
ylim <- range(c(loo$mean - (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent(wave00col, 100),
pch = 19)
segments(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent(wave00col, 50))
abline(0,1)
legend('topleft', legend = colnames(Y_const_level1a_scaled)[i], bty = 'n', text.font = 2 )
legend('bottomright',legend = paste('pmae =',round(loostats_km_Y_level1a[[i]]$pmae,2),'%') , bty = 'n', text.font = 2)
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Level 1a ensemble outputs', side = 3, line = 0, outer = TRUE, cex = 2)
#dev.off()
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
for(i in 1:length(loolist_km_Y_level1a)){
y <- Y_const_level1a_wave01_scaled[, i]
loo <- loolist_km_Y_level1a_wave01[[i]]
ylim <- range(c(loo$mean - (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent(wave01col, 100),
pch = 19)
segments(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent(wave01col, 100))
abline(0,1)
legend('topleft', legend = colnames(Y_const_level1a_scaled)[i], bty = 'n', text.font = 2 )
legend('bottomright',legend = paste('pmae =',round(loostats_km_Y_level1a_wave01[[i]]$pmae,2),'%') , bty = 'n', text.font = 2)
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Level 1a ensemble outputs', side = 3, line = 0, outer = TRUE, cex = 2)
# remove to level 1a Relative to toplevel_ix - useful for plotting etc.
#toplevel_to_level_1a_ix <-(toplevel_ix[-Y_nlevel0_ix])[level1a_ix]
# So further constraining to level 2 can be associated back to the top level.
level2_ix <- which(Y_const_level1a_scaled[,'nbp_lnd_sum'] > 0 &
Y_const_level1a_scaled[,'npp_nlim_lnd_sum'] > 35 & Y_const_level1a_scaled[,'npp_nlim_lnd_sum'] < 80 &
Y_const_level1a_scaled[,'cSoil_lnd_sum'] > 750 & Y_const_level1a_scaled[,'cSoil_lnd_sum'] < 3000 &
Y_const_level1a_scaled[,'cVeg_lnd_sum'] > 300 & Y_const_level1a_scaled[,'cVeg_lnd_sum'] < 800
)
level2_ix_level1a_wave01 <- which(Y_const_level1a_wave01_scaled[,'nbp_lnd_sum'] > 0 &
Y_const_level1a_wave01_scaled[,'npp_nlim_lnd_sum'] > 35 & Y_const_level1a_wave01_scaled[,'npp_nlim_lnd_sum'] < 80 &
Y_const_level1a_wave01_scaled[,'cSoil_lnd_sum'] > 750 & Y_const_level1a_wave01_scaled[,'cSoil_lnd_sum'] < 3000 &
Y_const_level1a_wave01_scaled[,'cVeg_lnd_sum'] > 300 & Y_const_level1a_wave01_scaled[,'cVeg_lnd_sum'] < 800
)
We see that the error stats for some of the outputs from wave01 are worse, but there are many more ensemble members that lie within the constraints for wave 01.
“pmae” is “proportional mean absolue error”, which is the mean absolute error expressed as a percentage of the original (minimally constrained) ensemble range in that output.
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
par(mfrow = c(2,4), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_Y_level1a)){
y <- Y_const_level1a_scaled[level2_ix, i]
loo <- loolist_km_Y_level1a[[i]]
ylim <- range(c(loo$mean[level2_ix] - (2*loo$sd[level2_ix]), loo$mean[level2_ix] + (2*loo$sd[level2_ix])) )
plot(y, loo$mean[level2_ix], xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent(wave00col, 100),
pch = 19)
segments(x0 = y, y0 = loo$mean[level2_ix] - (2*loo$sd[level2_ix]) , x1 = y , y1 = loo$mean[level2_ix] + (2*loo$sd[level2_ix]), col = makeTransparent(wave00col, 100))
abline(0,1)
legend('topleft', legend = colnames(Y_const_level1a_scaled)[i], bty = 'n', text.font = 2 )
legend('bottomright',legend = paste('pmae =',round(loostats_km_Y_level1a[[i]]$pmae,2),'%') , bty = 'n', text.font = 2)
}
#dev.off()
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
for(i in 1:length(loolist_km_Y_level1a)){
y <- Y_const_level1a_wave01_scaled[level2_ix_level1a_wave01, i]
loo <- loolist_km_Y_level1a_wave01[[i]]
ylim <- range(c(loo$mean[level2_ix_level1a_wave01] - (2*loo$sd[level2_ix_level1a_wave01]), loo$mean[level2_ix_level1a_wave01] + (2*loo$sd[level2_ix_level1a_wave01])) )
plot(y, loo$mean[level2_ix_level1a_wave01], xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent(wave01col, 100),
pch = 19)
segments(x0 = y, y0 = loo$mean[level2_ix_level1a_wave01] - (2*loo$sd[level2_ix_level1a_wave01]) , x1 = y , y1 = loo$mean[level2_ix_level1a_wave01] + (2*loo$sd[level2_ix_level1a_wave01]), col = makeTransparent(wave01col, 50))
abline(0,1)
legend('topleft', legend = colnames(Y_const_level1a_scaled)[i], bty = 'n', text.font = 2 )
legend('bottomright',legend = paste('pmae =',round(loostats_km_Y_level1a_wave01[[i]]$pmae,2),'%') , bty = 'n', text.font = 2)
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Level 2 constrained ensemble outputs', side = 3, line = 0, outer = TRUE, cex = 2)
This gives us an idea of how good the emulator is where it really matters, and as the members are consistent, gives us a fairer idea of whether the emulators have improved with more members.
Good news is, the emulators are more accurate for wave01.
kmLooStatsSubset <- function (km, ix, type = "UK")
{
# Calculate summary statistics for a subset of the members of a km fit list
loo <- leaveOneOut.km(km, type = type, trend.reestim = TRUE)
preddiff <- loo$mean[ix] - km@y[ix]
mae <- mean(abs(preddiff))
rmse <- sqrt(mean(preddiff^2))
maxerr <- max(preddiff)
absdiff <- abs(diff(range(km@y)))
pmae <- (mae/absdiff) * 100
return(list(loo = loo, mae = mae, pmae = pmae, maxerr = maxerr))
}
loolist_km_Y_level1a_level2 <- rapply(loolist_km_Y_level1a, f = function(x) x[level2_ix], how = "list")
loolist_km_Y_level1a_wave01_level2 <- rapply(loolist_km_Y_level1a_wave01, f = function(x) x[level2_ix], how = "list")
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
par(mfrow = c(2,2), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_Y_level1a_level2)){
y <- Y_const_level1a_scaled[level2_ix, i]
loo <- loolist_km_Y_level1a_level2[[i]]
ylim <- range(c(loo$mean- (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent(wave00col, 250),
pch = 19)
arrows(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent(wave00col, 150) , angle = 90, code = 3, length = 0.03)
y1 <- Y_const_level1a_wave01_scaled[level2_ix, i]
loo <- loolist_km_Y_level1a_wave01_level2[[i]]
points(y1, loo$mean, xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent(wave01col, 250),
pch = 19)
arrows(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent(wave01col, 250), angle = 90, code = 3, length = 0.03)
abline(0,1)
legend('topleft', legend = colnames(Y_const_level1a_scaled)[i], bty = 'n', text.font = 2 )
legend('bottomright',legend = paste('pmae =',round(loostats_km_Y_level1a[[i]]$pmae,2),'%') , bty = 'n', text.font = 2)
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Level 2 wave 00 ensemble outputs', side = 3, line = 0, outer = TRUE, cex = 2)
reset()
legend('topleft', pch = 19, legend = c('wave00', 'wave01'), col = c(wave00col, wave01col ), horiz = TRUE)
NA
NA
These leave-one-out prediction accuracy plots rank the ensemble members from largest underprediction to largest overprediction using the wave00 predictions. A perfect prediction would appear on the horizontal “zero” line.
Many of the wave01 predictions are closer to the horizontal line, and therefore more accurate predictions.
None of the predictions are outside the uncertainty bounds, which suggests they are overconservative (should be smaller).
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
par(mfrow = c(4,1), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_Y_level1a_level2)){
y <- Y_const_level1a_scaled[level2_ix, i]
loo_00 <- loolist_km_Y_level1a_level2[[i]]
loo_01 <- loolist_km_Y_level1a_wave01_level2[[i]]
preddiff_wave00 <- y - loo_00$mean
preddiff_wave01 <- y - loo_01$mean
# rank by the original wave 00 predictions
loo_rank_ix <- sort(preddiff_wave00 , index.return = TRUE)
ylim <- range(c(preddiff_wave00[loo_rank_ix$ix] - (2*loo_00$sd[loo_rank_ix$ix]),
preddiff_wave00[loo_rank_ix$ix] + (2*loo_00$sd[loo_rank_ix$ix]),
preddiff_wave01[loo_rank_ix$ix] - (2*loo_01$sd[loo_rank_ix$ix]),
preddiff_wave01[loo_rank_ix$ix] + (2*loo_01$sd[loo_rank_ix$ix])
)
)
plot(preddiff_wave00[loo_rank_ix$ix], xlab = '', ylab = '', main = '' , col = makeTransparent(wave00col, 255),
pch = 19, ylim = ylim)
abline(h = 0)
arrows(x0 = 1:length(y), y0 = preddiff_wave00[loo_rank_ix$ix] - (2*loo_00$sd[loo_rank_ix$ix]) , x1 = 1:length(y) , y1 = preddiff_wave00[loo_rank_ix$ix] + (2*loo_00$sd[loo_rank_ix$ix]), col = makeTransparent(wave00col, 150), angle = 90, code = 3, length = 0.03)
points(preddiff_wave01[loo_rank_ix$ix], xlab = '', ylab = '', main = '' , col = makeTransparent(wave01col, 255),
pch = 19)
arrows(x0 = 1:length(y), y0 = preddiff_wave01[loo_rank_ix$ix] - (2*loo_01$sd[loo_rank_ix$ix]) , x1 = 1:length(y) , y1 = preddiff_wave01[loo_rank_ix$ix] + (2*loo_01$sd[loo_rank_ix$ix]), col = makeTransparent(wave01col, 150), angle = 90, code = 3, length = 0.03)
mtext(colnames(Y_const_level1a_scaled)[i], side = 3, adj = 0, line = 1)
}
reset()
legend('topleft', pch = 19, legend = c('wave00', 'wave01'), col = c(wave00col, wave01col ), horiz = TRUE)
loostats_km_Y_level1a_sub <- lapply(fit_list_const_level1a, FUN = kmLooStatsSubset, ix = level2_ix)
loostats_km_Y_level1a_wave01_sub <- lapply(fit_list_const_level1a_wave01, FUN = kmLooStatsSubset, ix = level2_ix)
Looking at the proportional mean absolute error (pmae), expressed in percent, we can see that it doesn’t improve much for the whole ensemble, but does improve significantly for the subset of ensemble members that fall within AW’s constraints from the first ensemble (marked "_sub").
pmae_wave00 <- lapply(loostats_km_Y_level1a, FUN = function(x) x$pmae )
pmae_wave01 <- lapply(loostats_km_Y_level1a_wave01, FUN = function(x) x$pmae )
pmae_wave00_sub <- lapply(loostats_km_Y_level1a_sub, FUN = function(x) x$pmae )
pmae_wave01_sub <- lapply(loostats_km_Y_level1a_wave01_sub, FUN = function(x) x$pmae )
pmae_table <- cbind(pmae_wave00, pmae_wave01, pmae_wave00_sub, pmae_wave01_sub)
print(pmae_table)
pmae_wave00 pmae_wave01 pmae_wave00_sub pmae_wave01_sub
[1,] 5.084153 4.927634 7.17902 4.913062
[2,] 4.282042 4.007528 4.804363 4.085545
[3,] 3.597314 3.790255 4.555892 3.833982
[4,] 4.240908 4.515949 4.814 3.226669
obscol = 'purple'
wave01_level2col <- 'gold'
ag <- matrix(nrow = nrow(nbp_ens), ncol = ncol(nbp_ens))
ag01 <- matrix(nrow = nrow(nbp_ens_wave01), ncol = ncol(nbp_ens_wave01))
for(i in 1:nrow(nbp_ens)){
ag[i, ] <- historical_carbon_budget$`fossil emissions excluding carbonation`[resid_ix] - historical_carbon_budget$`ocean sink`[resid_ix] - nbp_ens[i, ]
}
for(i in 1:nrow(nbp_ens_wave01)){
ag01[i, ] <- historical_carbon_budget$`fossil emissions excluding carbonation`[resid_ix] - historical_carbon_budget$`ocean sink`[resid_ix] - nbp_ens_wave01[i, ]
}
ag_stan <- historical_carbon_budget$`fossil emissions excluding carbonation`[resid_ix] - historical_carbon_budget$`ocean sink`[resid_ix] - nbp_stan
#pdf(width = 8, height = 6, file = 'ag.pdf')
matplot(years, t(ag), type = 'l', lty = 'solid',ylim = c(-2, 10), col = makeTransparent(wave00col,100),
ylab = 'GtC', main = 'atmospheric growth', xlab = '',
bty = 'n', xlim = c(1960, 2013))
matlines(years, t(ag01), col = makeTransparent(wave01col,100), lty = 'solid')
matlines(years, t(ag01[level2_ix_wave01, ]), col = makeTransparent(wave01_level2col, 100) , lty = 'solid')
lines(historical_carbon_budget$Year, historical_carbon_budget$`atmospheric growth`, col = obscol, lwd =2)
lines(years, ag_stan, col = stancol, lwd =2)
legend('topleft', legend = c('wave00', 'wave01', 'wave01 constrained', 'standard', 'GCB'), col = c(wave00col, wave01col, wave01_level2col, stancol, obscol), lty = 'solid', lwd = 2)
#dev.off()
cumulative_nbp_ens <- t(apply(nbp_ens, 1, cumsum))
cumulative_nbp_ens_wave01 <- t(apply(nbp_ens_wave01, 1, cumsum))
cumulative_nbp_stan <- cumsum(nbp_stan)
matplot(years, t(cumulative_nbp_ens), type = 'l', lty = 'solid',ylim = c(-130, 250), col = makeTransparent(wave00col,150),
ylab = 'GtC', main = 'cumulative NBP', xlab = '',
bty = 'n', xlim = c(1850, 2020))
matlines(years, t(cumulative_nbp_ens_wave01),
col = makeTransparent(wave01col,150),
lty = 'solid')
matlines(years, t(cumulative_nbp_ens_wave01[level2_ix_wave01, ]),
col = makeTransparent(wave01_level2col, 150),
lty = 'solid')
lines(years, cumulative_nbp_stan, col = makeTransparent(stancol, 200), lty = 'solid', lwd = 1.5 )
legend('topleft', legend = c('wave00', 'wave01', 'wave01 constrained', 'standard'), col = c(wave00col, wave01col, wave01_level2col, stancol), lty = 'solid', lwd = 1.5)
# Cumulative NBP at the end of the run
cnbp_modern_ens <- apply(cumulative_nbp_ens[, 135:164], 1, mean)
cnbp_modern_wave01 <- apply(cumulative_nbp_ens_wave01[, 135:164], 1, mean)
cnbp_modern_stan <- mean(cumulative_nbp_stan[135:164])
Calculate the atmospheric growth rate of 1984- 2013 using a simple linear fit
# agr = atmospheric growth rate
# agiav = atmospheric growth interannual variability
agr_modern_ens <- rep(NA, length = nrow(ag))
agiav_modern_ens <- rep(NA, length = nrow(ag))
for(i in 1:nrow(ag)){
dat <- data.frame(t =1:30, ag = ag[i, 135:164])
fit <- lm(ag ~ t, data = dat)
agr_modern_ens[i] <- coef(fit)['t']
agiav_modern_ens[i] <- sd(fit$residuals)
}
agr_modern_wave01 <- rep(NA, length = nrow(ag01))
agiav_modern_wave01 <- rep(NA, length = nrow(ag01))
for(i in 1:nrow(ag01)){
dat <- data.frame(t =1:30, ag = ag01[i, 135:164])
fit <- lm(ag ~ t, data = dat)
agr_modern_wave01[i] <- coef(fit)['t']
agiav_modern_wave01[i] <- sd(fit$residuals)
}
dat <- data.frame(t =1:30, ag = ag_stan[135:164])
agr_stan_fit <- lm(ag ~ t, data = dat)
agr_stan <- coef(agr_stan_fit)['t']
agiav_stan <- sd(agr_stan_fit$residuals)
plot(agr_modern_ens, cnbp_modern_ens, col = makeTransparent(wave00col,150),
ylim = c(-120,220), pch = 19,
xlab = 'Atmospheric growth rate 1984-2014',
ylab = 'Cumulative NBP from preindustrial by 1984-2013'
)
cnbp_wave01_cols <- rep(wave01col, length(cnbp_modern_wave01))
cnbp_wave01_cols[level2_ix_wave01] <- wave01_level2col
points( agr_modern_wave01,cnbp_modern_wave01, col = makeTransparent(cnbp_wave01_cols, 150), pch = 19)
points( agr_stan,cnbp_modern_stan, col = stancol, pch = 19, cex = 1.5)
legend('topleft', legend = c('wave00', 'wave01', 'wave01 constrained', 'standard'), col = c(wave00col, wave01col, wave01_level2col, stancol), pch = 19)
print(c('correlation agr vs cnbp (all members)', cor(agr_modern_ens, cnbp_modern_ens)))
[1] "correlation agr vs cnbp (all members)" "-0.0407011946805412"
print(c('correlation agr vs cnbp (wave01)', cor(agr_modern_wave01, cnbp_modern_wave01)))
[1] "correlation agr vs cnbp (wave01)" "0.00334447281747124"
fit_wave00 <- lm(agr_modern_ens ~ cnbp_modern_ens)
fit_wave01 <- lm(agr_modern_wave01 ~ cnbp_modern_wave01)
print(summary(fit_wave00))
Call:
lm(formula = agr_modern_ens ~ cnbp_modern_ens)
Residuals:
Min 1Q Median 3Q Max
-0.047413 -0.003802 0.003838 0.006715 0.023090
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.114e-01 5.176e-04 215.173 <2e-16 ***
cnbp_modern_ens -1.379e-09 1.519e-09 -0.908 0.364
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.01155 on 497 degrees of freedom
Multiple R-squared: 0.001657, Adjusted R-squared: -0.0003522
F-statistic: 0.8247 on 1 and 497 DF, p-value: 0.3643
print(summary(fit_wave01))
Call:
lm(formula = agr_modern_wave01 ~ cnbp_modern_wave01)
Residuals:
Min 1Q Median 3Q Max
-4639 -4639 -4639 -4639 1382335
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.639e+03 4.639e+03 1.000 0.318
cnbp_modern_wave01 5.475e-29 9.483e-28 0.058 0.954
Residual standard error: 80210 on 298 degrees of freedom
Multiple R-squared: 1.119e-05, Adjusted R-squared: -0.003344
F-statistic: 0.003333 on 1 and 298 DF, p-value: 0.954
Interannual variability and cumulative NBP
(correlations are close to zero, especially in the later wave)
plot(agiav_modern_ens, cnbp_modern_ens, col = makeTransparent(wave00col,150),
ylim = c(-120,220), pch = 19,
xlab = 'Atmospheric growth IAV 1984-2014',
ylab = 'Cumulative NBP from preindustrial by 1984-2013'
)
points( agiav_modern_wave01, cnbp_modern_wave01, col = makeTransparent(cnbp_wave01_cols, 150), pch = 19)
points( agiav_stan,cnbp_modern_stan, col = stancol, pch = 19, cex = 1.5)
legend('bottomright', legend = c('wave00', 'wave01', 'wave01 constrained', 'standard'), col = c(wave00col, wave01col, wave01_level2col, stancol), pch = 19)
print(cor(agiav_modern_ens, cnbp_modern_ens))
[1] 0.03223847
print(cor( agiav_modern_wave01, cnbp_modern_wave01))
[1] 0.003344484
Using Atmospheric Growth Rate as an example, how close can we get the model to observations? Can we do better than standard? What are the trade offs of doing so? How does getting close in AGR affect performance in other outputs?
# Define the observed atmospheric growth rate.
ag_obs <- historical_carbon_budget$`atmospheric growth`[which(historical_carbon_budget$Year %in% years)]
# Model departure from observations of atmospheric growth
ag_err <- sweep(ag, 2, ag_obs, FUN = '-')
ag01_err <- sweep(ag01, 2, ag_obs, FUN = '-')
long_modern_years <- 1960:2013
ag_modern_ix <- which( years %in% long_modern_years)
ag_err_modern <- ag_err[, ag_modern_ix]
ag01_err_modern <- ag01_err[, ag_modern_ix]
ag_err_stan <- ag_stan - ag_obs
ag_err_stan_modern <- ag_err_stan[ag_modern_ix]
matplot(long_modern_years, t(ag_err_modern), type = 'l', lty = 'solid', col = makeTransparent(wave00col, 100), main = 'Amospheric Growth Error')
errcol <- rep(wave01col, nrow(ag01))
#errcol[level2_ix_wave01] <- wave01_level2col
matlines(long_modern_years, t(ag01_err_modern), col = makeTransparent(errcol, 100), lty = 'solid')
matlines(long_modern_years, t(ag01_err_modern[level2_ix_wave01, ]), col = makeTransparent(wave01_level2col,100), lty = 'solid')
lines(long_modern_years, ag_err_stan_modern, col = stancol)
legend('bottomleft', legend = c('wave00', 'wave01', 'wave01 constrained', 'standard'), col = c(wave00col, wave01col, wave01_level2col, stancol), lty = 'solid', lwd = 1.5)
abline(h=0)
# Atmospheric growth mean error
ag_modern_me <- apply(ag_err_modern,1,mean)
ag01_modern_me <- apply(ag01_err_modern,1, mean)
ag_modern_me_stan <- mean(ag_err_stan[ag_modern_ix])
# Mean absolute error
ag_modern_mae <- apply(abs(ag_err_modern),1,mean)
ag01_modern_mae <- apply(abs(ag01_err_modern),1, mean)
ag_modern_mae_stan <- mean(abs(ag_err_stan[ag_modern_ix]))
# Root mean square error
ag_modern_rmse <- apply(ag_err_modern,1, function(x) sqrt(mean(x^2)))
ag01_modern_rmse <- apply(ag01_err_modern,1, function(x) sqrt(mean(x^2)))
ag_modern_rmse_stan <- sqrt(mean(ag_err_stan[ag_modern_ix]^2))
# There are some big outliers
outlier_ix_wave01 <- which(abs(ag01_modern_me )> 100)
par(mfrow = c(2,1))
hist(ag_modern_me, main = 'Atmospheric growth mean error', col = wave00col, xlim = c(-3, 3))
rug(ag_modern_me_stan, col = stancol, lwd = 2)
hist(ag01_modern_me[-outlier_ix_wave01], col = wave01col, xlim = c(-3,3), main = '')
rug(ag_modern_me_stan, col = stancol, lwd = 2)
par(mfrow = c(2,1))
hist(ag_modern_mae, main = 'Atmospheric growth mean absolute error', col = wave00col, xlim = c(0, 3))
rug(ag_modern_mae_stan, col = stancol, lwd = 2)
hist(ag01_modern_mae[-outlier_ix_wave01], col = wave01col, lwd = 2, xlim = c(0,3), main = '')
rug(ag_modern_mae_stan, col = stancol, lwd = 2)
par(mfrow = c(2,1))
hist(ag_modern_rmse, xlim = c(0,3), col = wave00col, main = 'Atmospheric growth RMSE')
rug(ag_modern_rmse_stan, col = stancol, lwd = 2 )
hist(ag01_modern_rmse[-outlier_ix_wave01], col = wave01col, xlim = c(0,3), main = '')
rug(ag_modern_rmse_stan, col = stancol, lwd = 2 )
NA
NA
NA
We’ve established that most of the original ensemble have an ME/MAE/RMSE larger than the standard run. More (but few) of the wave01 perform better than standard.
better_ix_ag_rmse <- which(ag_modern_rmse < ag_modern_rmse_stan)
better_ix_ag01_rmse <- which(ag01_modern_rmse < ag_modern_rmse_stan)
X_better_ag <- rbind(X[better_ix_ag_rmse, ], X_wave01_train[better_ix_ag01_rmse, ])
A map of the 2D projections of parameter space where the ensemble member performs better than standard.
The blue part is the first wave, and not subject to constraint so may be removed in the second wave (wave01).
better_ix <- 1:nrow(X_better_ag)
better_cols <- c(rep(wave00col, length(better_ix_ag_rmse)), rep(wave01col,length(better_ix_ag01_rmse)))
pairs(X_better_ag,
col = better_cols,
gap = 0,
xlim = c(0,1), ylim = c(0,1),
pch = 19,
cex= 0.8,
lower.panel = NULL)
NA
NA
NA
Having trouble fitting RMSE, to trying mean error.
Why is there an odd collection at just under 1?
# there are 100 wave01 ensemble members that pass the level 2 constrainst
length(level2_ix_wave01)
[1] 100
better_ix_ag_me <- which(abs(ag_modern_me) < abs(ag_modern_me_stan))
# There are 104 that have a smaller mean error than standard
better_ix_ag01_me <- which(abs(ag01_modern_me) < abs(ag_modern_me_stan))
# There are only 41 that pass level 2 AND have smaller error than standard
level2_and_better_ag01_ix <- intersect(level2_ix_wave01, better_ix_ag01_me)
This next pairs plot looks at all the ensemble members that have a better mean atmospheric growth error than standard.
X_better_ag <- rbind(X[better_ix_ag_me, ], X_wave01_train[better_ix_ag01_me, ])
better_ix <- 1:nrow(X_better_ag)
better_cols <- c(makeTransparent(rep(wave00col, length(better_ix_ag_me)), 100), makeTransparent(rep(wave01col,length(better_ix_ag01_me)), 100))
pairs(X_better_ag,
col = better_cols,
gap = 0,
xlim = c(0,1), ylim = c(0,1),
pch = 19,
cex= 0.8,
lower.panel = NULL)
This next plot looks at all the ensemble members that have a better mean atmospheric growth error than standard AND pass the level 2 constraints.
The number is small (41/300), but the ensemble members seem spread across parameter space.
X_level2_and_better_ag01 <- X[level2_and_better_ag01_ix, ]
pairs(X_level2_and_better_ag01,
col = makeTransparent('black', 150),
gap = 0,
xlim = c(0,1), ylim = c(0,1),
pch = 19,
cex= 0.8,
lower.panel = NULL)
plot(fit_ag_me)
NA
NA
NA
NA
(length(X_kept_ix) / nunif) * 100
[1] 21.057
This pairs plot shows the 2d and marginal density of emulated input points where the emulated atmospheric growth is closer to the observations than the standard model.
This technique might provide a useful set of points for optimising the model (at least to atmospheric growth).
#pairs(X_unif[X_kept_ix, ][1:50,],
# col = makeTransparent('black',50),
# gap = 0,
# xlim = c(0,1), ylim = c(0,1),
# pch = 19,
# cex= 0.8,
# lower.panel = NULL)
par(oma = c(0,0,0,3), bg = 'white')
panel.hist = function(x, ...) {
usr = par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5))
hist(x, freq = FALSE, col="cyan", add=TRUE)
lines(density(x))
}
pairs(X_unif[X_kept_ix, ],
# labels = 1:d,
gap = 0, lower.panel = NULL, xlim = c(0,1), ylim = c(0,1),
panel = dfunc_up,
diag.panel = panel.hist,
cex.labels = 1,
col.axis = 'white',
dfunc_col = rb)
Next, check emulators of all the other outputs and apply the constraints to them. See how the constraints change.
# First, try the emulators built using just wave01
#fit_list_const_level1a
Y_const_pred_unif_mean <- matrix(NA, ncol = ncol(Y_const_level1a_scaled), nrow = nrow(X_unif))
colnames(Y_const_pred_unif_mean) <- colnames(Y_const_level1a_scaled)
Y_const_pred_unif_sd <- matrix(NA, ncol = ncol(Y_const_level1a_scaled), nrow = nrow(X_unif))
colnames(Y_const_pred_unif_sd) <- colnames(Y_const_level1a_scaled)
for(i in 1:length(fit_list_const_level1a)){
pred_unif <- predict.km(object=fit_list_const_level1a[[i]], newdata = X_unif, type = 'UK')
Y_const_pred_unif_mean[,i ] <- pred_unif$mean
Y_const_pred_unif_sd[,i ] <- pred_unif$sd
}
(length(level2_ix_em) / nunif) * 100
[1] 12.985
Emulated members passing level2 constraints AND having lower error in atmospheric growth than standard.
Red point indicates the standard input.
X_stan_norm <- normalize(matrix(rep(1, 32), nrow = 1), wrt = lhs)
level2_and_ag_ix_em <- which(Y_const_pred_unif_mean[,'nbp_lnd_sum'] > 0 &
Y_const_pred_unif_mean[,'npp_nlim_lnd_sum'] > 35 & Y_const_pred_unif_mean[,'npp_nlim_lnd_sum'] < 80 &
Y_const_pred_unif_mean[,'cSoil_lnd_sum'] > 750 & Y_const_pred_unif_mean[,'cSoil_lnd_sum'] < 3000 &
Y_const_pred_unif_mean[,'cVeg_lnd_sum'] > 300 & Y_const_pred_unif_mean[,'cVeg_lnd_sum'] < 800 &
abs(pred_unif_ag$mean) < abs(ag_modern_me_stan)
)
pairs(rbind(X_unif[level2_and_ag_ix_em, ], X_stan_norm),
# labels = 1:d,
gap = 0, lower.panel = NULL, xlim = c(0,1), ylim = c(0,1),
panel = dfunc_up_truth,
diag.panel = panel.hist,
cex.labels = 1,
col.axis = 'white',
dfunc_col = rb)
(length(level2_and_ag_ix_em) / nunif) * 100
[1] 5.446